library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
library(data.table)
## 
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
library(viridis)
## Loading required package: viridisLite
"%&%" = function(a,b) paste(a,b,sep="")
my.dir = "/home/lauren/files_for_revisions_plosgen/fst_results/"
#my.dir = "~/mount/lauren/files_for_revisions_plosgen/fst_results/"
r2.dir <- '/home/lauren/files_for_revisions_plosgen/en_v7/new_output/'
afa_test <- read.table(r2.dir %&% 'AFA_nested_cv_all_model_summaries_10_peer_3pcs.txt',header=T) %>% 
  mutate(R2_AFA=ifelse(test_R2_avg<0,NA,test_R2_avg)) %>% select(gene_id,R2_AFA)
cau_test <- read.table(r2.dir %&% 'CAU_nested_cv_all_model_summaries_10_peer_3pcs.txt',header=T) %>% 
  mutate(R2_CAU=ifelse(test_R2_avg<0,NA,test_R2_avg)) %>% select(gene_id,R2_CAU)
his_test <- read.table(r2.dir %&% 'HIS_nested_cv_all_model_summaries_10_peer_3pcs.txt',header=T) %>% 
  mutate(R2_HIS=ifelse(test_R2_avg<0,NA,test_R2_avg)) %>% select(gene_id,R2_HIS)
#try not setting to 0
# afa_test <- read.table(r2.dir %&% 'AFA_nested_cv_all_model_summaries_10_peer_3pcs.txt',header=T) %>% 
#   mutate(R2_AFA=test_R2_avg) %>% select(gene_id,R2_AFA)
# cau_test <- read.table(r2.dir %&% 'CAU_nested_cv_all_model_summaries_10_peer_3pcs.txt',header=T) %>% 
#   mutate(R2_CAU=test_R2_avg) %>% select(gene_id,R2_CAU)
# his_test <- read.table(r2.dir %&% 'HIS_nested_cv_all_model_summaries_10_peer_3pcs.txt',header=T) %>% 
#   mutate(R2_HIS=test_R2_avg) %>% select(gene_id,R2_HIS)
# afa_test <- read.table(r2.dir %&% 'AFA_nested_cv_all_model_summaries_10_peer_3pcs.txt',header=T) %>% 
#   mutate(R2_AFA=ifelse(test_R2_avg<(-10),NA,test_R2_avg)) %>% select(gene_id,R2_AFA)
# cau_test <- read.table(r2.dir %&% 'CAU_nested_cv_all_model_summaries_10_peer_3pcs.txt',header=T) %>% 
#   mutate(R2_CAU=ifelse(test_R2_avg<(-10),NA,test_R2_avg)) %>% select(gene_id,R2_CAU)
# his_test <- read.table(r2.dir %&% 'HIS_nested_cv_all_model_summaries_10_peer_3pcs.txt',header=T) %>% 
#   mutate(R2_HIS=ifelse(test_R2_avg<(-10),NA,test_R2_avg)) %>% select(gene_id,R2_HIS)


r2_test <- left_join(afa_test,cau_test,by='gene_id')
r2_test <- left_join(r2_test,his_test,by='gene_id')

AFA

afa <- fread(my.dir %&% "fst_table_AFA.txt")
mean_afa <- afa[,list(mean_fstCAUafa=mean(fstCAUafa, na.rm = TRUE),mean_fstAFAhis=mean(fstAFAhis, na.rm = TRUE),
  betaAFA_fstCAUafa=sum(fstCAUafa * abs(betaAFA), na.rm = TRUE)/table(is.na(fstCAUafa * betaAFA))[[1]],
  betaAFA_fstAFAhis=sum(fstAFAhis * abs(betaAFA), na.rm = TRUE)/table(is.na(fstAFAhis * betaAFA))[[1]]),
  by=GENE]
mean_afa <- left_join(mean_afa,r2_test,by=c('GENE'='gene_id'))
## Warning: Column `GENE`/`gene_id` joining character vector and factor,
## coercing into character vector
mean_afa_0.2 <- dplyr::filter(mean_afa, R2_AFA > 0.2 | R2_CAU > 0.2)
# ggplot(mean_afa_0.2, aes(x=R2_AFA-R2_CAU,y=betaAFA_fstCAUafa)) + geom_point() + stat_density_2d(aes(fill = ..level..), geom = "polygon")
# ggplot(mean_afa_0.2, aes(x=R2_AFA-R2_CAU,y=mean_fstCAUafa)) + geom_point() + stat_density_2d(aes(fill = ..level..), geom = "polygon")

mean_afa_0.2 <- dplyr::filter(mean_afa, R2_AFA > 0.2 | R2_HIS > 0.2)
# ggplot(mean_afa_0.2, aes(x=R2_AFA-R2_HIS,y=betaAFA_fstAFAhis)) + geom_point() + stat_density_2d(aes(fill = ..level..), geom = "polygon")
# ggplot(mean_afa_0.2, aes(x=R2_AFA-R2_HIS,y=mean_fstAFAhis)) + geom_point() + stat_density_2d(aes(fill = ..level..), geom = "polygon")

CAU

cau <- fread(my.dir %&% "fst_table_CAU.txt")
mean_cau <- cau[,list(mean_fstCAUafa=mean(fstCAUafa, na.rm = TRUE),mean_fstHIScau=mean(fstHIScau, na.rm = TRUE),
  betaCAU_fstCAUafa=sum(fstCAUafa * abs(betaCAU), na.rm = TRUE)/table(is.na(fstCAUafa * betaCAU))[[1]],
  betaCAU_fstHIScau=sum(fstHIScau * abs(betaCAU), na.rm = TRUE)/table(is.na(fstHIScau * betaCAU))[[1]]),
  by=GENE]
mean_cau <- left_join(mean_cau,r2_test,by=c('GENE'='gene_id'))
## Warning: Column `GENE`/`gene_id` joining character vector and factor,
## coercing into character vector
mean_cau_0.2 <- dplyr::filter(mean_cau, R2_AFA > 0.1 | R2_CAU > 0.1)
# ggplot(mean_cau_0.2, aes(x=R2_CAU-R2_AFA,y=betaCAU_fstCAUafa)) + geom_point() + stat_density_2d(aes(fill = ..level..), geom = "polygon")
# ggplot(mean_cau_0.2, aes(x=R2_CAU-R2_AFA,y=mean_fstCAUafa)) + geom_point() + stat_density_2d(aes(fill = ..level..), geom = "polygon")

mean_cau_0.2 <- dplyr::filter(mean_cau, R2_CAU > 0.1 | R2_HIS > 0.1)
# ggplot(mean_cau_0.2, aes(x=R2_CAU-R2_HIS,y=betaCAU_fstHIScau)) + geom_point() + stat_density_2d(aes(fill = ..level..), geom = "polygon")
# ggplot(mean_cau_0.2, aes(x=R2_CAU-R2_HIS,y=mean_fstHIScau)) + geom_point() + stat_density_2d(aes(fill = ..level..), geom = "polygon")

HIS

his <- fread(my.dir %&% "fst_table_HIS.txt")
mean_his <- his[,list(mean_fstAFAhis=mean(fstAFAhis, na.rm = TRUE),mean_fstHIScau=mean(fstHIScau, na.rm = TRUE),
  betaHIS_fstAFAhis=sum(fstAFAhis * abs(betaHIS), na.rm = TRUE)/table(is.na(fstAFAhis * betaHIS))[[1]],
  betaHIS_fstHIScau=sum(fstHIScau * abs(betaHIS), na.rm = TRUE)/table(is.na(fstHIScau * betaHIS))[[1]]),
  by=GENE]
mean_his <- left_join(mean_his,r2_test,by=c('GENE'='gene_id'))
## Warning: Column `GENE`/`gene_id` joining character vector and factor,
## coercing into character vector
mean_his_0.2 <- dplyr::filter(mean_his, R2_AFA > 0.1 | R2_HIS > 0.1)
# ggplot(mean_his_0.2, aes(x=R2_HIS-R2_AFA,y=betaHIS_fstAFAhis)) + geom_point() + stat_density_2d(aes(fill = ..level..), geom = "polygon")
# ggplot(mean_his_0.2, aes(x=R2_HIS-R2_AFA,y=mean_fstAFAhis)) + geom_point() + stat_density_2d(aes(fill = ..level..), geom = "polygon")

mean_his_0.2 <- dplyr::filter(mean_his, R2_CAU > 0.1 | R2_HIS > 0.1)
# ggplot(mean_his_0.2, aes(x=R2_HIS-R2_CAU,y=betaHIS_fstHIScau)) + geom_point() + stat_density_2d(aes(fill = ..level..), geom = "polygon")
# ggplot(mean_his_0.2, aes(x=R2_HIS-R2_CAU,y=mean_fstHIScau)) + geom_point() + stat_density_2d(aes(fill = ..level..), geom = "polygon")

facet_wrap plots

afa_cau <- dplyr::select(mean_afa_0.2, GENE, pop1=R2_AFA, pop2=R2_CAU, mean_Fst=mean_fstCAUafa, beta_Fst=betaAFA_fstCAUafa) %>% 
  mutate(pop1=ifelse(is.na(pop1),0,pop1),pop2=ifelse(is.na(pop2),0,pop2)) %>% 
  mutate(pop="AFA-CAU",diffR2 = pop1 - pop2) %>% select(pop, GENE, pop1, pop2, diffR2, mean_Fst, beta_Fst)

afa_his <- dplyr::select(mean_afa_0.2, GENE, pop1=R2_AFA, pop2=R2_HIS, mean_Fst=mean_fstAFAhis, beta_Fst=betaAFA_fstAFAhis) %>% 
  mutate(pop1=ifelse(is.na(pop1),0,pop1),pop2=ifelse(is.na(pop2),0,pop2)) %>% 
  mutate(pop="AFA-HIS",diffR2 = pop1 - pop2) %>% select(pop, GENE, pop1, pop2, diffR2, mean_Fst, beta_Fst)

cau_afa <- dplyr::select(mean_cau_0.2, GENE, pop2=R2_AFA, pop1=R2_CAU, mean_Fst=mean_fstCAUafa, beta_Fst=betaCAU_fstCAUafa) %>% 
  mutate(pop1=ifelse(is.na(pop1),0,pop1),pop2=ifelse(is.na(pop2),0,pop2)) %>% 
  mutate(pop="CAU-AFA",diffR2 = pop1 - pop2) %>% select(pop, GENE, pop1, pop2, diffR2, mean_Fst, beta_Fst)

cau_his <- dplyr::select(mean_cau_0.2, GENE, pop1=R2_CAU, pop2=R2_HIS, mean_Fst=mean_fstHIScau, beta_Fst=betaCAU_fstHIScau) %>% 
  mutate(pop1=ifelse(is.na(pop1),0,pop1),pop2=ifelse(is.na(pop2),0,pop2)) %>% 
  mutate(pop="CAU-HIS",diffR2 = pop1 - pop2) %>% select(pop, GENE, pop1, pop2, diffR2, mean_Fst, beta_Fst)

his_afa <- dplyr::select(mean_his_0.2, GENE, pop2=R2_AFA, pop1=R2_HIS, mean_Fst=mean_fstAFAhis, beta_Fst=betaHIS_fstAFAhis) %>% 
  mutate(pop1=ifelse(is.na(pop1),0,pop1),pop2=ifelse(is.na(pop2),0,pop2)) %>% 
  mutate(pop="HIS-AFA",diffR2 = pop1 - pop2) %>% select(pop, GENE, pop1, pop2, diffR2, mean_Fst, beta_Fst)

his_cau <- dplyr::select(mean_his_0.2, GENE, pop2=R2_CAU, pop1=R2_HIS, mean_Fst=mean_fstHIScau, beta_Fst=betaHIS_fstHIScau) %>% 
  mutate(pop1=ifelse(is.na(pop1),0,pop1),pop2=ifelse(is.na(pop2),0,pop2)) %>% 
  mutate(pop="HIS-CAU",diffR2 = pop1 - pop2) %>% select(pop, GENE, pop1, pop2, diffR2, mean_Fst, beta_Fst)

all <- rbind(afa_cau,afa_his,cau_afa,cau_his,his_afa,his_cau)

all <- mutate(all,pop=factor(pop,levels=c("AFA-CAU","CAU-AFA","AFA-HIS","HIS-AFA","HIS-CAU","CAU-HIS")))
all <- all[complete.cases(all),]

sfig1<-ggplot(all, aes(x=diffR2,y=beta_Fst)) + facet_wrap(~pop,nrow=3) + 
  geom_point(col="gray")+ geom_vline(xintercept=0)  + geom_density_2d() + 
  labs(x=expression(paste(R^2, " difference (pop1 - pop2)")),y=expression(paste("weighted mean ", F[ST]))) + 
  theme_bw(14)
print(sfig1)

sfig2 <-ggplot(all, aes(x=diffR2,y=beta_Fst)) + facet_wrap(~pop,nrow=3)  +
  geom_point(col="gray") + geom_vline(xintercept=0) + geom_density_2d() + 
  labs(x=expression(paste(R^2, " difference (pop1 - pop2)")),y=expression(paste("ZOOM weighted mean ", F[ST]))) + 
  theme_bw(14) + coord_cartesian(ylim=c(0,0.02)) 
print(sfig2)

ggplot(all, aes(x=diffR2,y=mean_Fst)) + facet_wrap(~pop,nrow=3) + geom_point(col='gray') + geom_vline(xintercept=0) + geom_density_2d() + labs(x=expression(paste(R^2, " difference (pop1 - pop2)")),y=expression(paste("mean ", F[ST]))) + theme_bw(14) 

for(thresh in c(0.05, 0.1, 0.2, 0.3)){
  cat(thresh, '\n')
  newall <- mutate(all,diffGroup=ifelse(diffR2 < (-1*thresh) | diffR2 > thresh, 'large', 'small'))
  print(with(newall, t.test(mean_Fst[diffGroup=='large'], mean_Fst[diffGroup=='small'])))
 # print(ggplot(newall, aes(x=mean_Fst,fill=diffGroup)) + geom_density(alpha=0.7) + scale_fill_viridis(discrete=TRUE))
  print(with(newall, t.test(beta_Fst[diffGroup=='large'], beta_Fst[diffGroup=='small'])))
  #print(ggplot(newall, aes(x=beta_Fst,fill=diffGroup)) + geom_density(alpha=0.7) + scale_fill_viridis(discrete=TRUE))
}
## 0.05 
## 
##  Welch Two Sample t-test
## 
## data:  mean_Fst[diffGroup == "large"] and mean_Fst[diffGroup == "small"]
## t = 15.6, df = 7488.9, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.009609348 0.012371469
## sample estimates:
##  mean of x  mean of y 
## 0.04416641 0.03317600 
## 
## 
##  Welch Two Sample t-test
## 
## data:  beta_Fst[diffGroup == "large"] and beta_Fst[diffGroup == "small"]
## t = 14.559, df = 9388.7, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.0006190104 0.0008116344
## sample estimates:
##   mean of x   mean of y 
## 0.002092818 0.001377496 
## 
## 0.1 
## 
##  Welch Two Sample t-test
## 
## data:  mean_Fst[diffGroup == "large"] and mean_Fst[diffGroup == "small"]
## t = 20.873, df = 10877, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.01295748 0.01564339
## sample estimates:
##  mean of x  mean of y 
## 0.04867305 0.03437261 
## 
## 
##  Welch Two Sample t-test
## 
## data:  beta_Fst[diffGroup == "large"] and beta_Fst[diffGroup == "small"]
## t = 17.409, df = 8688.5, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.0008328352 0.0010441921
## sample estimates:
##   mean of x   mean of y 
## 0.002390336 0.001451822 
## 
## 0.2 
## 
##  Welch Two Sample t-test
## 
## data:  mean_Fst[diffGroup == "large"] and mean_Fst[diffGroup == "small"]
## t = 17.428, df = 2286.7, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.0163011 0.0204347
## sample estimates:
##  mean of x  mean of y 
## 0.05650797 0.03814007 
## 
## 
##  Welch Two Sample t-test
## 
## data:  beta_Fst[diffGroup == "large"] and beta_Fst[diffGroup == "small"]
## t = 11.548, df = 2012, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.001000586 0.001409942
## sample estimates:
##   mean of x   mean of y 
## 0.002904367 0.001699103 
## 
## 0.3 
## 
##  Welch Two Sample t-test
## 
## data:  mean_Fst[diffGroup == "large"] and mean_Fst[diffGroup == "small"]
## t = 12.528, df = 686.89, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.01558265 0.02137461
## sample estimates:
##  mean of x  mean of y 
## 0.05845271 0.03997408 
## 
## 
##  Welch Two Sample t-test
## 
## data:  beta_Fst[diffGroup == "large"] and beta_Fst[diffGroup == "small"]
## t = 6.558, df = 625.09, p-value = 1.143e-10
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.001056169 0.001959061
## sample estimates:
##   mean of x   mean of y 
## 0.003311798 0.001804183
all1 <- mutate(all,diffGroup=ifelse(diffR2 >= (-1*0.1) & diffR2 <= 0.1, '<= t', '> t'),absdiffR2=abs(diffR2),thres='t = 0.1')
all2 <- mutate(all,diffGroup=ifelse(diffR2 >= (-1*0.2) & diffR2 <= 0.2, '<= t', '> t'),absdiffR2=abs(diffR2),thres='t = 0.2')
all3 <- mutate(all,diffGroup=ifelse(diffR2 >= (-1*0.3) & diffR2 <= 0.3, '<= t', '> t'),absdiffR2=abs(diffR2),thres='t = 0.3')

with(all1,t.test(mean_Fst[diffGroup=='> t'], mean_Fst[diffGroup=='<= t']))$p.value
## [1] 6.729618e-95
with(all2,t.test(mean_Fst[diffGroup=='> t'], mean_Fst[diffGroup=='<= t']))$p.value
## [1] 5.765816e-64
with(all3,t.test(mean_Fst[diffGroup=='> t'], mean_Fst[diffGroup=='<= t']))$p.value
## [1] 1.413181e-32
newall <- rbind(all1,all2,all3)
newall <- mutate(newall,diffGroup=factor(diffGroup,levels=c('> t', '<= t')))
c <- ggplot(newall, aes(x=mean_Fst,fill=diffGroup)) + geom_density(alpha=0.7)+ facet_wrap(~thres) +
 labs(x=expression(paste("mean ", F[ST])),title='C',fill=expression(paste("|",R^2, " difference|"))) + theme_bw(14) +  theme(plot.margin=unit(c(0,0.2,0,0.2), "cm")) + scale_fill_viridis(discrete = TRUE) + coord_cartesian(xlim=c(0,0.2)) + theme(legend.justification=c(0,1), legend.position=c(0.85,0.995),legend.title = element_text(size=10),legend.text = element_text(size=10),legend.key.size = unit(0.6, "cm"),plot.margin=unit(c(0,0.2,0,0.2), "cm"))
print(c)

ggplot(newall, aes(x=beta_Fst,fill=diffGroup)) + geom_density(alpha=0.7)+ facet_wrap(~thres) +
 labs(x=expression(paste("weighted ", F[ST])),fill=expression(paste("|",R^2, " difference|"))) + theme_bw(14) +  theme(plot.margin=unit(c(0,0.2,0,0.2), "cm")) + scale_fill_viridis(discrete = TRUE) + coord_cartesian(xlim=c(-0.001,0.01))+
  theme(legend.justification=c(0,1), legend.position=c(0.85,0.99),legend.title = element_text(size=10),legend.text = element_text(size=10),legend.key.size = unit(0.6, "cm"),plot.margin=unit(c(0,0.2,0,0.2), "cm"),axis.text.x = element_text(angle = 90, hjust = 1))

suball <- dplyr::filter(all, pop=='AFA-CAU' | pop=='AFA-HIS' | pop=='HIS-CAU')
for(thresh in c(0.1, 0.2, 0.3)){
  cat(thresh, '\n')
  newall <- mutate(suball,diffGroup=ifelse(diffR2 < (-1*thresh) | diffR2 > thresh, 'large', 'small'))
  print(with(newall, t.test(mean_Fst[diffGroup=='large'], mean_Fst[diffGroup=='small'])))
  print(ggplot(newall, aes(x=mean_Fst,fill=diffGroup)) + geom_density(alpha=0.7) + scale_fill_viridis(discrete=TRUE))
  print(ggplot(newall, aes(x=mean_Fst,fill=diffGroup)) + geom_density(alpha=0.7) + scale_fill_viridis(discrete=TRUE) + facet_wrap(~pop,scales="free"))
  print(with(newall, t.test(beta_Fst[diffGroup=='large'], beta_Fst[diffGroup=='small'])))
  print(ggplot(newall, aes(x=beta_Fst,fill=diffGroup)) + geom_density(alpha=0.7) + scale_fill_viridis(discrete=TRUE))
  print(ggplot(newall, aes(x=beta_Fst,fill=diffGroup)) + geom_density(alpha=0.7) + scale_fill_viridis(discrete=TRUE) + facet_wrap(~pop,scales="free"))
}
## 0.1 
## 
##  Welch Two Sample t-test
## 
## data:  mean_Fst[diffGroup == "large"] and mean_Fst[diffGroup == "small"]
## t = 16.066, df = 4581.6, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.01492662 0.01907587
## sample estimates:
##  mean of x  mean of y 
## 0.04773017 0.03072893

## 
##  Welch Two Sample t-test
## 
## data:  beta_Fst[diffGroup == "large"] and beta_Fst[diffGroup == "small"]
## t = 13.45, df = 3740.1, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.0009650713 0.0012944425
## sample estimates:
##   mean of x   mean of y 
## 0.002497145 0.001367388

## 0.2 
## 
##  Welch Two Sample t-test
## 
## data:  mean_Fst[diffGroup == "large"] and mean_Fst[diffGroup == "small"]
## t = 14.386, df = 1176.6, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.01931168 0.02541101
## sample estimates:
##  mean of x  mean of y 
## 0.05712034 0.03475899

## 
##  Welch Two Sample t-test
## 
## data:  beta_Fst[diffGroup == "large"] and beta_Fst[diffGroup == "small"]
## t = 9.4746, df = 1103.5, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.001005916 0.001531365
## sample estimates:
##   mean of x   mean of y 
## 0.002943128 0.001674487

## 0.3 
## 
##  Welch Two Sample t-test
## 
## data:  mean_Fst[diffGroup == "large"] and mean_Fst[diffGroup == "small"]
## t = 10.083, df = 353.46, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.01711581 0.02541064
## sample estimates:
##  mean of x  mean of y 
## 0.05875588 0.03749265

## 
##  Welch Two Sample t-test
## 
## data:  beta_Fst[diffGroup == "large"] and beta_Fst[diffGroup == "small"]
## t = 5.8746, df = 323.85, p-value = 1.052e-08
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.0009856182 0.0019781239
## sample estimates:
##   mean of x   mean of y 
## 0.003294473 0.001812602

ggplot(suball, aes(x=diffR2,y=mean_Fst)) + facet_wrap(~pop,nrow=1)  + geom_point(col='gray')+ geom_vline(xintercept=0) + 
  geom_density_2d() + labs(x=expression(paste(R^2, " difference (pop1 - pop2)")),y=expression(paste("mean ", F[ST])),title='C') + theme_bw(14) + coord_cartesian(xlim=c(-0.85,0.85)) + theme(plot.margin=unit(c(0,0.2,0,0.2), "cm")) + scale_color_viridis(discrete = TRUE)

no filter

afa_cau <- dplyr::select(mean_afa, GENE, pop1=R2_AFA, pop2=R2_CAU, mean_Fst=mean_fstCAUafa, beta_Fst=betaAFA_fstCAUafa) %>% 
  mutate(pop1=ifelse(is.na(pop1),0,pop1),pop2=ifelse(is.na(pop2),0,pop2)) %>% 
  mutate(pop="AFA-CAU",diffR2 = pop1 - pop2) %>% select(pop, GENE, pop1, pop2, diffR2, mean_Fst, beta_Fst)

afa_his <- dplyr::select(mean_afa, GENE, pop1=R2_AFA, pop2=R2_HIS, mean_Fst=mean_fstAFAhis, beta_Fst=betaAFA_fstAFAhis) %>% 
  mutate(pop1=ifelse(is.na(pop1),0,pop1),pop2=ifelse(is.na(pop2),0,pop2)) %>%
  mutate(pop="AFA-HIS",diffR2 = pop1 - pop2) %>% select(pop, GENE, pop1, pop2, diffR2, mean_Fst, beta_Fst)

cau_afa <- dplyr::select(mean_cau, GENE, pop2=R2_AFA, pop1=R2_CAU, mean_Fst=mean_fstCAUafa, beta_Fst=betaCAU_fstCAUafa) %>% 
  mutate(pop1=ifelse(is.na(pop1),0,pop1),pop2=ifelse(is.na(pop2),0,pop2)) %>%
  mutate(pop="CAU-AFA",diffR2 = pop1 - pop2) %>% select(pop, GENE, pop1, pop2, diffR2, mean_Fst, beta_Fst)

cau_his <- dplyr::select(mean_cau, GENE, pop1=R2_CAU, pop2=R2_HIS, mean_Fst=mean_fstHIScau, beta_Fst=betaCAU_fstHIScau) %>% 
  mutate(pop1=ifelse(is.na(pop1),0,pop1),pop2=ifelse(is.na(pop2),0,pop2)) %>%
  mutate(pop="CAU-HIS",diffR2 = pop1 - pop2) %>% select(pop, GENE, pop1, pop2, diffR2, mean_Fst, beta_Fst)

his_afa <- dplyr::select(mean_his, GENE, pop2=R2_AFA, pop1=R2_HIS, mean_Fst=mean_fstAFAhis, beta_Fst=betaHIS_fstAFAhis) %>% 
  mutate(pop1=ifelse(is.na(pop1),0,pop1),pop2=ifelse(is.na(pop2),0,pop2)) %>%
  mutate(pop="HIS-AFA",diffR2 = pop1 - pop2) %>% select(pop, GENE, pop1, pop2, diffR2, mean_Fst, beta_Fst)

his_cau <- dplyr::select(mean_his, GENE, pop2=R2_CAU, pop1=R2_HIS, mean_Fst=mean_fstHIScau, beta_Fst=betaHIS_fstHIScau) %>% 
  mutate(pop1=ifelse(is.na(pop1),0,pop1),pop2=ifelse(is.na(pop2),0,pop2)) %>%
  mutate(pop="HIS-CAU",diffR2 = pop1 - pop2) %>% select(pop, GENE, pop1, pop2, diffR2, mean_Fst, beta_Fst)

all_nof <- rbind(afa_cau,afa_his,cau_afa,cau_his,his_afa,his_cau)
all_nof <- all_nof[complete.cases(all_nof),]
ggplot(all_nof, aes(x=diffR2,y=pop1,col=pop2)) + facet_wrap(~pop,nrow=3) + geom_point() +  labs(x=expression(paste(R^2, " difference (pop1 - pop2)")),y=expression(paste("pop1 ", R^2)),col=expression(paste("pop2 ", R^2))) + theme_bw(14) + 
  geom_density_2d()

suball_nof <- dplyr::filter(all_nof, pop=='AFA-CAU' | pop=='AFA-HIS' | pop=='HIS-CAU') 

b <- ggplot(suball_nof, aes(x=diffR2,y=pop1,col=pop2)) + facet_wrap(~pop,nrow=1) + geom_point() +  labs(x=expression(paste(R^2, " difference (pop1 - pop2)")),y=expression(paste("pop1 ", R^2)),col=expression(paste("pop2 ", R^2)),title='B') + theme_bw(14) + 
  scale_color_viridis() + 
  theme(legend.justification=c(0,1), legend.position=c(0.005,0.995),legend.title = element_text(size=10),legend.text = element_text(size=10),legend.key.size = unit(0.3, "cm"),plot.margin=unit(c(0,0.2,0,0.2), "cm")) + 
  coord_cartesian(xlim=c(-0.85,0.85)) + geom_density_2d(col='gray') + scale_size(range=c(5,20))
print(b)

a <- ggplot(suball_nof, aes(x=pop1,y=pop2)) + geom_point() + geom_abline(slope=1,intercept=0,col='blue') + facet_wrap(~pop,nrow=1) + theme_bw(14)+
  labs(x=expression(paste("pop1 ", R^2)),y=expression(paste("pop2 ", R^2)),title='A') + geom_density_2d(col="gray") +
  theme(plot.margin=unit(c(0.05,0.2,0,0.2), "cm"))
dt_suball_nof <- data.table(suball_nof)
dt_suball_nof[,list(R=cor(pop1,pop2,use='p')),by=pop]
##        pop         R
## 1: AFA-CAU 0.7548007
## 2: AFA-HIS 0.8346441
## 3: HIS-CAU 0.9177133
print(a)

#make Table 3
ac <- dplyr::filter(dt_suball_nof,pop=='AFA-CAU')
ah <- dplyr::filter(dt_suball_nof,pop=='AFA-HIS')
ch <- dplyr::filter(dt_suball_nof,pop=='HIS-CAU')

perfCounts <- function(popdf){
  print(table(popdf$diffR2 > 0.1))
  print(table(popdf$diffR2 > 0.1)/dim(popdf)[1])
  print(table(popdf$diffR2 > (-0.1) & popdf$diffR2 < 0.1))
  print(table(popdf$diffR2 > (-0.1) & popdf$diffR2 < 0.1)/dim(popdf)[1])
  print(table(popdf$diffR2 < (-0.1)))
  print(table(popdf$diffR2 < (-0.1))/dim(popdf)[1])
  print(dim(popdf)[1])
}

## AFA-CAU
perfCounts(ac)
## 
## FALSE  TRUE 
##  7857   472 
## 
##      FALSE       TRUE 
## 0.94333053 0.05666947 
## 
## FALSE  TRUE 
##  1502  6827 
## 
##     FALSE      TRUE 
## 0.1803338 0.8196662 
## 
## FALSE  TRUE 
##  7299  1030 
## 
##     FALSE      TRUE 
## 0.8763357 0.1236643 
## [1] 8329
## AFA-HIS
perfCounts(ah)
## 
## FALSE  TRUE 
##  8020   371 
## 
##      FALSE       TRUE 
## 0.95578596 0.04421404 
## 
## FALSE  TRUE 
##  1255  7136 
## 
##    FALSE     TRUE 
## 0.149565 0.850435 
## 
## FALSE  TRUE 
##  7507   884 
## 
##    FALSE     TRUE 
## 0.894649 0.105351 
## [1] 8391
## HIS-CAU
perfCounts(ch)
## 
## FALSE  TRUE 
##  8159   352 
## 
##      FALSE       TRUE 
## 0.95864176 0.04135824 
## 
## FALSE  TRUE 
##   695  7816 
## 
##      FALSE       TRUE 
## 0.08165903 0.91834097 
## 
## FALSE  TRUE 
##  8168   343 
## 
##      FALSE       TRUE 
## 0.95969921 0.04030079 
## [1] 8511
table(ac$mean_Fst > 0.3)
## 
## FALSE  TRUE 
##  8280    49
table(ac$mean_Fst > 0.3 & ac$diffR2 > 0)
## 
## FALSE  TRUE 
##  8312    17
grid.arrange(a, b, c, nrow=3)

ggplot(all,aes(x=pop1,y=mean_Fst,col=pop2)) + geom_point() + geom_density_2d(col='gray') + facet_wrap(~pop,nrow=3) + 
  scale_color_gradientn(colours = c('blue','pink')) + theme_bw(14) + 
  labs(x=expression(paste("pop1 ", R^2)),col=expression(paste("pop2 ", R^2)))

ggplot(all,aes(x=pop1,y=beta_Fst,col=pop2)) + geom_point() + geom_density_2d(col='gray') + facet_wrap(~pop,nrow=3) + 
   scale_color_gradientn(colours = c('blue','pink')) + theme_bw(14) + 
  labs(x=expression(paste("pop1 ", R^2)),col=expression(paste("pop2 ", R^2)))

ggplot(all,aes(x=pop1,y=beta_Fst,col=pop2)) + geom_point() + geom_density_2d(col='gray') + facet_wrap(~pop,nrow=3) + 
   scale_color_gradientn(colours = c('blue','pink')) + theme_bw(14) + coord_cartesian(ylim=c(0,0.02)) + 
  labs(x=expression(paste("pop1 ", R^2)),col=expression(paste("pop2 ", R^2)))

ggplot(all,aes(x=mean_Fst,col=pop)) + geom_density() + coord_cartesian(xlim=c(0,0.2))

ggplot(all,aes(x=beta_Fst,col=pop)) + geom_density() + coord_cartesian(xlim=c(0,0.01))

Sup Fig Fst

grid.arrange(sfig1, sfig2, nrow=1)